{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## chap11 NumPy及数值开源库\n",
    "### 1 理解Python中的数据类型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 通过列表创建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 4, 2, 5, 3])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "np.array([1, 4, 2, 5, 3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 整体提升为浮点数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3.14, 4.  , 2.  , 3.  ])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array([3.14, 4, 2, 3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 2., 3., 4.], dtype=float32)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#如果希望明确设置数组的数据类型，可以用 dtype 关键字：In[10]: \n",
    "np.array([1, 2, 3, 4], dtype='float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[range(2, 5), range(4, 7), range(6, 9)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alist = [range(i, i + 3) for i in [2, 4, 6]]\n",
    "alist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2, 3, 4],\n",
       "       [4, 5, 6],\n",
       "       [6, 7, 8]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#以下是用列表的列表初始化多维数组的一种方法：\n",
    "#In[11]: # 嵌套列表构成的多维数组\n",
    "\n",
    "np.array([range(i, i + 3) for i in [2, 4, 6]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#创建一个长度为10的数组，数组的值都是0\n",
    "np.zeros(10, dtype=int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#In[13]: # 创建一个3×5的浮点型数组，数组的值都是1\n",
    "np.ones((3, 5), dtype=float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[3.14, 3.14, 3.14, 3.14, 3.14],\n",
       "       [3.14, 3.14, 3.14, 3.14, 3.14],\n",
       "       [3.14, 3.14, 3.14, 3.14, 3.14]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#创建一个3×5的浮点型数组，数值全部填充为3.14\n",
    "np.full((3, 5), 3.14)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 数组的值是一个线性序列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.arange(0, 20, 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " - 创建一个5个元素的数组，这5个数均匀地分配到0~1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.  , 0.25, 0.5 , 0.75, 1.  ])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linspace(0, 1, 5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 创建一个3×3的、在0~1均匀分布的随机数组成的数组\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.52604162, 0.69676317, 0.0440966 ],\n",
       "       [0.79877255, 0.55715097, 0.72782777],\n",
       "       [0.84998202, 0.33958346, 0.16303322]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.random((3, 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 创建一个3×3的、均值为0、方差为1的正态分布的随机数数组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.46625452,  0.28677139,  2.17237811],\n",
       "       [-0.23773541, -0.25042591, -0.61510481],\n",
       "       [ 2.35686052,  0.31767261,  1.05380974]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.normal(0, 1, (3, 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 创建一个3×3的、[0, 10)区间的随机整型数组\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 3, 2],\n",
       "       [0, 9, 1],\n",
       "       [6, 9, 5]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.randint(0, 10, (3, 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 创建一个3×3的单位矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 0., 0.],\n",
       "       [0., 1., 0.],\n",
       "       [0., 0., 1.]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.eye(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 创建一个由3个整型数组成的未初始化的数组，数组的值是内存空间中的任意值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1.])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.empty(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 2 NumPy数组基础及计算\n",
    "\n",
    "## 1)NumPy数组的属性\n",
    "\n",
    "-  首先介绍一些有用的数组属性。先定义三个随机的数组：一个一维数组、一个二维数组和一个三维数组。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(0) # 设置随机数种子"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 0, 3, 3, 7, 9])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = np.random.randint(10, size=6) # 一维数组\n",
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[3, 5, 2, 4],\n",
       "       [7, 6, 8, 8],\n",
       "       [1, 6, 7, 7]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2 = np.random.randint(10, size=(3, 4)) # 二维数组\n",
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[8, 1, 5, 9, 8],\n",
       "        [9, 4, 3, 0, 3],\n",
       "        [5, 0, 2, 3, 8],\n",
       "        [1, 3, 3, 3, 7]],\n",
       "\n",
       "       [[0, 1, 9, 9, 0],\n",
       "        [4, 7, 3, 2, 7],\n",
       "        [2, 0, 0, 4, 5],\n",
       "        [5, 6, 8, 4, 1]],\n",
       "\n",
       "       [[4, 9, 8, 1, 1],\n",
       "        [7, 9, 9, 3, 6],\n",
       "        [7, 2, 0, 3, 5],\n",
       "        [9, 4, 4, 6, 4]]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x3 = np.random.randint(10, size=(3, 4, 5)) # 三维数组\n",
    "x3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "-  每个数组有 ndim(数组的维度)、shape(数组每个维度的大小)和 size(数组的总大小)属性："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x3 ndim:  3\n",
      "x3 shape: (3, 4, 5)\n",
      "x3 size:  60\n"
     ]
    }
   ],
   "source": [
    "print(\"x3 ndim: \", x3.ndim)\n",
    "print(\"x3 shape:\", x3.shape)\n",
    "print(\"x3 size: \", x3.size)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 另外一个有用的属性是 dtype，它是数组的数据类型：\n",
    "- **对于不同的系统，结果会不一样**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dtype: int32\n"
     ]
    }
   ],
   "source": [
    "print(\"dtype:\", x3.dtype)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 其他的属性包括表示每个数组元素字节大小的 itemsize，以及表示数组总字节大小的属性nbytes："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "itemsize: 4 bytes\n",
      "nbytes: 240 bytes\n"
     ]
    }
   ],
   "source": [
    "print(\"itemsize:\", x3.itemsize, \"bytes\")\n",
    "print(\"nbytes:\", x3.nbytes, \"bytes\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2)　数组索引：获取单个元素\n",
    "\n",
    "- 和 Python 列表一样，在一维数组中，你也可以通过中括号指定索引获取第 i 个值(从 0 开始计数)：\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 0, 3, 3, 7, 9])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1[4]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 为了获取数组的末尾索引，可以用负值索引："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1[-2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 在多维数组中，可以用逗号分隔的索来获取元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[3, 5, 2, 4],\n",
       "       [7, 6, 8, 8],\n",
       "       [1, 6, 7, 7]])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[0, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[2, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[2, -1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 也可以用以上索引方式修改元素值："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2020,    5,    2,    4],\n",
       "       [   7,    6,    8,    8],\n",
       "       [   1,    6,    7,    7]])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[0, 0] = 2020\n",
    "x2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 当你试图将一个浮点值插入一个整型数组时，浮点值会被截短成整型。 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 0, 3, 3, 7, 9])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1[0] = 3.14159 # 这将被截短\n",
    "x1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3)　数组切片：获取子数组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.arange(10)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2 3 4]\n"
     ]
    }
   ],
   "source": [
    "x[:5] # 前五个元素\n",
    "print(x[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 5, 6])"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x[4:7] # 中间的子数组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 2, 4, 6, 8])"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x[::2] # 每隔一个元素，从索引 0 开始"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 3, 5, 7, 9])"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x[1::2] # 每隔一个元素，从索引1开始"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#这是一种非常方便的逆序数组的方式：\n",
    "x[::-1] # 所有元素，逆序的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 3, 1])"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x[5::-2] # 从索引5开始每隔一个元素逆序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2020,    5,    2,    4],\n",
       "       [   7,    6,    8,    8],\n",
       "       [   1,    6,    7,    7]])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#b. 多维子数组\n",
    "#多维切片也采用同样的方式处理，每一维用冒号分隔。例如：\n",
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2020,    5,    2],\n",
       "       [   7,    6,    8]])"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[:2, :3] # 两行，三列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2020,    2],\n",
       "       [   7,    8],\n",
       "       [   1,    7]])"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2[:, ::2] # 所有行，每隔一列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2020,    5,    2,    4],\n",
       "       [   7,    6,    8,    8],\n",
       "       [   1,    6,    7,    7]])"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[   7,    7,    6,    1],\n",
       "       [   8,    8,    6,    7],\n",
       "       [   4,    2,    5, 2020]])"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#最后，子数组维度也可以同时被逆序：\n",
    "x2[::-1, ::-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2020    7    1]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([2020,    7,    1])"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#c. 获取数组的行和列\n",
    "print(x2[:, 0]) # x2的第一列\n",
    "x2[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2020    5    2    4]\n"
     ]
    }
   ],
   "source": [
    "print(x2[0, :]) # x2的第一行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 在获取行时，出于语法的简介考虑，可以省略空的切片："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2020    5    2    4]\n"
     ]
    }
   ],
   "source": [
    "print(x2[0]) #等于x2[0, :]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3 数组的计算\n",
    "\n",
    "## 1). 数组的运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2 3]\n"
     ]
    }
   ],
   "source": [
    "x = np.arange(4)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x = [0 1 2 3]\n",
      "x + 5 = [5 6 7 8]\n",
      "x - 5 = [-5 -4 -3 -2]\n",
      "x * 2 = [0 2 4 6]\n",
      "x / 2 = [0.  0.5 1.  1.5]\n",
      "x // 2 = [0 0 1 1]\n"
     ]
    }
   ],
   "source": [
    "print(\"x =\", x)\n",
    "print(\"x + 5 =\", x + 5)\n",
    "print(\"x - 5 =\", x - 5)\n",
    "print(\"x * 2 =\", x * 2)\n",
    "print(\"x / 2 =\", x / 2)\n",
    "print(\"x // 2 =\", x // 2) #地板除法运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-x =  [ 0 -1 -2 -3]\n",
      "x ** 2 =  [0 1 4 9]\n",
      "x % 2 =  [0 1 0 1]\n"
     ]
    }
   ],
   "source": [
    "#还有逻辑非、** 表示的指数运算符和 % 表示的模运算符的一元通用函数：\n",
    "print(\"-x = \", -x)\n",
    "print(\"x ** 2 = \", x ** 2) #乘方\n",
    "print(\"x % 2 = \", x % 2) #取模"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 你可以任意将这些算术运算符组合使用。当然，你得考虑这些运算符的优先级："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-1.   -2.25 -4.   -6.25]\n"
     ]
    }
   ],
   "source": [
    "print(-(0.5 * x + 1) ** 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 所有这些算术运算符都是 NumPy 内置函数的简单封装器，例如 + 运算符就是一个 add 函数的封装器："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3])"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 3, 4, 5])"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.add(x, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-2, -1,  0,  1])"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.subtract(x,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0. , 0.5, 1. , 1.5])"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.divide(x,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 1, 1], dtype=int32)"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.floor_divide(x, 2) #得到的为商"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-2., -2., -1.,  0.,  1.,  1.,  2.])"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) #向下取整\n",
    "np.floor(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2). 绝对值\n",
    "\n",
    "- 正如 NumPy 能理解 Python 内置的运算操作，NumPy 也可以理解 Python 内置的绝对值函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 1, 0, 1, 2])"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.array([-2, -1, 0, 1, 2])\n",
    "abs(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 对应的 NumPy 通用函数是 np.absolute，该函数也可以用别名 np.abs 来访问："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 1, 0, 1, 2])"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.absolute(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 1, 0, 1, 2])"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.abs(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#这个通用函数也可以处理复数。当处理复数时，绝对值返回的是该复数的幅度："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5., 5., 2., 1.])"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.array([3 - 4j, 4 - 3j, 2 + 0j, 0 + 1j])\n",
    "np.abs(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## 3). 三角函数\n",
    "\n",
    "#NumPy 提供了大量好用的通用函数，其中对于数据科学家最有用的就是三角函数。首先定义一个角度数组："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.        , 1.57079633, 3.14159265])"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "theta = np.linspace(0, np.pi, 3)\n",
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 1., 0.])"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.round( np.sin(theta))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 现在可以对这些值进行一些三角函数计算："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "theta =  [0.         1.57079633 3.14159265]\n",
      "sin(theta) =  [0. 1. 0.]\n",
      "cos(theta) =  [ 1.  0. -1.]\n",
      "tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -0.00000000e+00]\n"
     ]
    }
   ],
   "source": [
    "print(\"theta = \", theta)\n",
    "print(\"sin(theta) = \",np.round(np.sin(theta)))\n",
    "print(\"cos(theta) = \",np.round( np.cos(theta)))\n",
    "print(\"tan(theta) = \", np.round(np.tan(theta),1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = [1,0,-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x =  [1, 0, -1]\n",
      "arcsin(x) =  [ 1.57079633  0.         -1.57079633]\n",
      "arccos(x) =  [0.         1.57079633 3.14159265]\n",
      "arctan(x) =  [ 0.78539816  0.         -0.78539816]\n"
     ]
    }
   ],
   "source": [
    "#这些值是在机器精度内计算的，所以有些应该是 0 的值并没有精确到 0 。逆三角函数同样可以使用：\n",
    "print(\"x = \", x)\n",
    "print(\"arcsin(x) = \", np.arcsin(x))\n",
    "print(\"arccos(x) = \", np.arccos(x))\n",
    "print(\"arctan(x) = \", np.arctan(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4). 指数和对数\n",
    "NumPy 中另一个常用的运算通用函数是指数运算："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x = [1, 2, 3]\n",
      "e^x = [ 2.71828183  7.3890561  20.08553692]\n",
      "2^x = [2. 4. 8.]\n",
      "3^x = [ 3  9 27]\n"
     ]
    }
   ],
   "source": [
    "x = [1, 2, 3]\n",
    "print(\"x =\", x)\n",
    "print(\"e^x =\", np.exp(x))\n",
    "print(\"2^x =\", np.exp2(x))\n",
    "print(\"3^x =\", np.power(3, x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "log2(4) =  1.0\n",
      "log10(1000) =  3.0\n",
      "ln(e**10) =  10.0\n",
      "log5(4) =  0.8613531161467861\n"
     ]
    }
   ],
   "source": [
    "print(\"log2(4) = \",np.log2(2))\n",
    "print(\"log10(1000) = \",np.log10(1000))\n",
    "print(\"ln(e**10) = \",np.log(np.e **10))\n",
    "print(\"log5(4) = \",np.log(4)/np.log(5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4 Numpy的索引和数组的排序\n",
    "### 1)索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[51 92 14 71 60 20 82 86 74 74]\n"
     ]
    }
   ],
   "source": [
    "#例如以下数组：\n",
    "import numpy as np\n",
    "rand = np.random.RandomState(42)\n",
    "x = rand.randint(100, size=10)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[71, 86, 14]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#假设我们希望获得三个不同的元素，可以用以下方式实现：\n",
    "[x[3], x[7], x[2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([71, 86, 14])"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#另外一种方法是通过传递索引的单个列表或数组来获得同样的结果：\n",
    "ind = [3, 7, 2]\n",
    "x[ind]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([71, 86, 14])"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x[[3,7,2]] #等价于上述语句"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[3 7]\n",
      " [4 5]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[71, 86],\n",
       "       [60, 20]])"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#利用丰富的索引，结果的形状与索引数组的形状一致，而不是与被索引数组的形状一致：\n",
    "ind = np.array([[3, 7],[4, 5]])\n",
    "print(ind)\n",
    "x[ind]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11]])"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#丰富的索引也对多个维度适用。假设有以下数组：\n",
    "X = np.arange(12).reshape((3, 4))\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2] [2 1 3]\n"
     ]
    }
   ],
   "source": [
    "#和标准的索引方式一样，第一个索引指的是行，第二个索引指的是列：\n",
    "#In[6]: \n",
    "row = np.array([0, 1, 2])\n",
    "col = np.array([2, 1, 3])\n",
    "print(row,col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2,  5, 11])"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[row, col]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **`np.newaxis`**\n",
    "- [np.newaxis参考](https://zhuanlan.zhihu.com/p/109094537)\n",
    "- 原来np.newaxis的作用是增加一个维度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5,)\n",
      "[1 2 3 4 5]\n"
     ]
    }
   ],
   "source": [
    "a=np.array([1,2,3,4,5])\n",
    "print(a.shape)\n",
    "print (a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5, 1)\n",
      "[[1]\n",
      " [2]\n",
      " [3]\n",
      " [4]\n",
      " [5]]\n"
     ]
    }
   ],
   "source": [
    "a=np.array([1,2,3,4,5])\n",
    "aa=a[:,np.newaxis]\n",
    "print(aa.shape)\n",
    "print (aa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, 5)\n",
      "[[1 2 3 4 5]]\n"
     ]
    }
   ],
   "source": [
    "a=np.array([1,2,3,4,5])\n",
    "aa=a[np.newaxis,:]\n",
    "print(aa.shape)\n",
    "print (aa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2,  1,  3],\n",
       "       [ 6,  5,  7],\n",
       "       [10,  9, 11]])"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#因此当我们将一个列向量和一个行向量组合在一个索引中时，会得到一个二维的结果：\n",
    "X[row[:, np.newaxis], col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 0],\n",
       "       [2, 1, 3],\n",
       "       [4, 2, 6]])"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "row[:, np.newaxis] * col"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## 2)数组的排序\n",
    "- 例如，一个简单的选择排序重复寻找列表中的最小值，并且不断交换直到列表是有序的。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def selection_sort(x):\n",
    "    for i in range(len(x)):\n",
    "        swap = i + np.argmin(x[i:])\n",
    "        (x[i], x[swap]) = (x[swap], x[i])\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3, 4, 5])"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.array([2, 1, 4, 3, 5])\n",
    "selection_sort(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3, 4, 5])"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#np.sort和np.argsort\n",
    "#In[5]: \n",
    "x = np.array([2, 1, 4, 3, 5])\n",
    "np.sort(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#如果希望用排好序的数组替代原始数组，可以使用数组的 sort 方法："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 2 3 4 5]\n"
     ]
    }
   ],
   "source": [
    "x.sort()\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#另外一个相关的函数是 argsort，该函数返回的是原始数组排好序的索引值："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 0 3 2 4]\n"
     ]
    }
   ],
   "source": [
    "x = np.array([2, 1, 4, 3, 5])\n",
    "i = np.argsort(x)\n",
    "print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 0 3 2 4]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3, 4, 5])"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#这些索引值可以被用于(通过丰富的索引)创建有序的数组\n",
    "print(i)\n",
    "x[i]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 沿着行或列排序\n",
    "-  numpy.random.RandomState()\n",
    "- numpy.random.RandomState()是一个伪随机数生成器。那么伪随机数是什么呢？\n",
    "- 伪随机数是用确定性的算法计算出来的似来自[0,1]均匀分布的随机数序列。并不真正的随机，但具有类似于随机数的统计特征，如均匀性、独立性等\n",
    "- [numpy.random.RandomState()参考文献](https://blog.csdn.net/xc_zhou/article/details/84794047?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.edu_weight&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.edu_weight)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[6 3 7 4 6 9]\n",
      " [2 6 7 4 3 7]\n",
      " [7 2 5 4 1 7]\n",
      " [5 1 4 0 9 5]]\n"
     ]
    }
   ],
   "source": [
    "rand = np.random.RandomState(42)\n",
    "X = rand.randint(0, 10, (4, 6))\n",
    "print(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2, 1, 4, 0, 1, 5],\n",
       "       [5, 2, 5, 4, 3, 7],\n",
       "       [6, 3, 7, 4, 6, 7],\n",
       "       [7, 6, 7, 4, 9, 9]])"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对X的每一列排序\n",
    "np.sort(X, axis=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[3, 4, 6, 6, 7, 9],\n",
       "       [2, 3, 4, 6, 7, 7],\n",
       "       [1, 2, 4, 5, 7, 7],\n",
       "       [0, 1, 4, 5, 5, 9]])"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "对X每一行排序\n",
    "np.sort(X, axis=1) \n",
    "#需要记住的是，这种处理方式是将行或列，当作独立的数组，任何行或列的值之间的关系将会丢失！"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## 5 NumPy的结构化数组\n",
    "- 下面介绍NumPy 的结构化数组和记录数组，它们为复合的、异构的数据提供了非常有效的存储。\n",
    "- 假定现在有关于一些人的分类数据(如姓名、年龄和体重)，我们需要存储这些数据用于 Python项目，那么一种可行的方法是将它们存在三个单独的数组中："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "name = ['Alice', 'Bob', 'Cathy', 'Doug']\n",
    "age = [25, 45, 37, 19]\n",
    "weight = [55.0, 85.5, 68.0, 61.5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#NumPy 可以用结构化数组实现这种存储，这些结构化数组是复合数据类型的。\n",
    "\n",
    "x = np.zeros(4, dtype=int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]\n"
     ]
    }
   ],
   "source": [
    "#与之类似，通过指定复合数据类型，可以构造一个结构化数组：\n",
    " # 使用复合数据结构的结构化数组\n",
    "data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),\n",
    "'formats':('U10', 'i4', 'f8')})\n",
    "print(data.dtype)\n",
    "#这里 U10 表示“长度不超过 10 的 Unicode 字符串”，i4 表示“4 字节(即 32 比特)整型”，f8 表示“8 字节(即 64 比特)浮点型”。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )\n",
      " ('Doug', 19, 61.5)]\n"
     ]
    }
   ],
   "source": [
    "#现在生成了一个空的数组容器，可以将列表数据放入数组中：\n",
    "#In[5]: \n",
    "data['name'] = name\n",
    "data['age'] = age\n",
    "data['weight'] = weight\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#可以通过索引或名称查看相应的值：\n",
    "#In[6]:\n",
    "data['name'] # 获取所有名字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Alice', 25, 55.)"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0] # 获取数据第一行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Doug'"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[-1]['name'] # 获取最后一行的名字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Alice', 'Doug'], dtype='<U10')"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['age'] < 30]['name'] # 获取年龄小于30岁的人的名字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#1)　生成结构化数组\n",
    "\n",
    "np.dtype({'names':('name', 'age', 'weight'),'formats':('U10', 'i4', 'f8')})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype([('name', '<U10'), ('age', '<i8'), ('weight', '<f4')])"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#为了简明起见，数值数据类型可以用 Python 类型或 NumPy 的 dtype 类型指定：\n",
    "\n",
    "np.dtype({'names':('name', 'age', 'weight'),'formats':((np.str_, 10), int, np.float32)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#复合类型也可以是元组列表：\n",
    "\n",
    "np.dtype([('name', 'S10'), ('age', 'i4'), ('weight', 'f8')])\n",
    "#可以仅仅用一个字符串来指定它，类型用逗号分隔：\n",
    "\n",
    "np.dtype('S10,i4,f8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])\n",
      "[[0. 0. 0.]\n",
      " [0. 0. 0.]\n",
      " [0. 0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "#2)　更高级的复合类型\n",
    "#我们会创建一个数据类型，该数据类型用 mat 组件包含一个 3×3 的浮点矩阵：\n",
    " \n",
    "tp = np.dtype([('id', 'i8'), ('mat', 'f8', (3, 3))])\n",
    "X = np.zeros(1, dtype=tp)\n",
    "print(X[0])\n",
    "print(X['mat'][0])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
